Read in counts data for all samples
#setwd("C:/Users/elkin/Desktop/Git_Projects/rat_tce_rnaseq/results")
load(file="tce_rat_counts.rda")# all samples from merged count files
dim(all_samples_1_48)
>>> [1] 32011 54
Creating DESeq2 object with counts data (without RUVr correction)
ds.all<-DESeqDataSetFromMatrix(counts.rat [,1:48],colData=coldata_48,design=~Rep+Trt)
rownames(ds.all)<-all_samples_1_48[,1]
Adding RUV to DEseq object
ds <- ds.all
ds$Trt<- as.factor(ds$Trt)
design(ds) <- ~ Rep + Trt
set <- newSeqExpressionSet(counts(ds, normalized=F), phenoData=AnnotatedDataFrame(data=as.data.frame(colData(ds))))
y <- DGEList(counts=counts(ds, normalized=F), group=ds$Trt)
design <- model.matrix(~ds$Rep+ds$Trt)
y <- calcNormFactors(y, method="upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
resid <- residuals(fit, type="deviance")
ruv <- RUVr(x=set, cIdx=rownames(set), k=1, residuals=resid)
pData(ruv)
>>> Sex Rep Trt dam W_1
>>> N1_Control_Male M 1 C a -0.173385717
>>> N2_Control_Male M 2 C b 0.095977919
>>> N3_Control_Male M 3 C c 0.115402694
>>> N4_Control_Male M 4 C d -0.048217654
>>> N5_Control_Male M 5 C e 0.139165154
>>> N6_Control_Male M 6 C f 0.169629716
>>> N1_Control_Female F 1 C a -0.280283445
>>> N2_Control_Female F 2 C b -0.061672673
>>> N3_Control_Female F 3 C c -0.014556129
>>> N4_Control_Female F 4 C d -0.092380467
>>> N5_Control_Female F 5 C e 0.009302410
>>> N6_Control_Female F 6 C f 0.189290518
>>> N1_TCE_Male M 1 TCE g 0.031492322
>>> N2_TCE_Male M 2 TCE h -0.022613196
>>> N3_TCE_Male M 3 TCE i -0.046742082
>>> N4_TCE_Male M 4 TCE j 0.141470959
>>> N5_TCE_Male M 5 TCE k -0.187389881
>>> N6_TCE_Male M 6 TCE l -0.255424655
>>> N1_TCE_Female F 1 TCE g 0.142581537
>>> N2_TCE_Female F 2 TCE h 0.068782585
>>> N3_TCE_Female F 3 TCE i 0.131544243
>>> N4_TCE_Female F 4 TCE j 0.055772940
>>> N5_TCE_Female F 5 TCE k -0.054589124
>>> N6_TCE_Female F 6 TCE l -0.022547890
>>> N1_NAC_Male M 1 NAC m -0.196949899
>>> N2_NAC_Male M 2 NAC n 0.282700072
>>> N3_NAC_Male M 3 NAC o -0.162590824
>>> N4_NAC_Male M 4 NAC p 0.148027133
>>> N5_NAC_Male M 5 NAC q 0.072678314
>>> N6_NAC_Male M 6 NAC r 0.110705677
>>> N1_NAC_Female F 1 NAC m 0.100984751
>>> N2_NAC_Female F 2 NAC n -0.256488953
>>> N3_NAC_Female F 3 NAC o 0.218244316
>>> N4_NAC_Female F 4 NAC p -0.284915981
>>> N5_NAC_Female F 5 NAC q -0.009360384
>>> N6_NAC_Female F 6 NAC r -0.061986807
>>> N1_TCE_NAC_Male M 1 TCE_NAC s 0.223125362
>>> N2_TCE_NAC_Male M 2 TCE_NAC t -0.004343612
>>> N3_TCE_NAC_Male M 3 TCE_NAC u 0.053823502
>>> N4_TCE_NAC_Male M 4 TCE_NAC v 0.064296168
>>> N5_TCE_NAC_Male M 5 TCE_NAC w 0.042999925
>>> N6_TCE_NAC_Male M 6 TCE_NAC x -0.037427622
>>> N1_TCE_NAC_Female F 1 TCE_NAC s 0.196743794
>>> N2_TCE_NAC_Female F 2 TCE_NAC t -0.145893575
>>> N3_TCE_NAC_Female F 3 TCE_NAC u -0.268744168
>>> N4_TCE_NAC_Female F 4 TCE_NAC v -0.030933400
>>> N5_TCE_NAC_Female F 5 TCE_NAC w 0.015145223
>>> N6_TCE_NAC_Female F 6 TCE_NAC x -0.100449098
#make deseq new deseq object that has the RUV factors
ds.all.ruv<- DESeqDataSetFromMatrix(countData = counts(ruv), colData = pData(ruv),
design = ~ W_1 + Rep + Trt)
head(ds.all.ruv)
>>> class: DESeqDataSet
>>> dim: 6 48
>>> metadata(1): version
>>> assays(1): counts
>>> rownames(6): LOC108349478 LOC103690911 ... LOC102556098 LOC103690072
>>> rowData names(0):
>>> colnames(48): N1_Control_Male N2_Control_Male ... N5_TCE_NAC_Female
>>> N6_TCE_NAC_Female
>>> colData names(5): Sex Rep Trt dam W_1
PCA Plots with RUVr correction
rld.ds.all.ruv<- rlogTransformation(ds.all.ruv) #log transform
DESeq2::plotPCA(rld.ds.all.ruv, intgroup="Trt")

DESeq2::plotPCA(rld.ds.all.ruv, intgroup="Rep")

DESeq2::plotPCA(rld.ds.all.ruv, intgroup="Sex")

# Manually filtering out genes with count<6
keep <- rowMeans(counts(ds.all.ruv)) >= 6
ds.all.ruv <- ds.all.ruv[keep,]
dim (ds.all.ruv)
>>> [1] 16661 48
Run DEseq2 Analysis with RUV (males)
ds.ruv.male<- ds.all.ruv[ , ds.all.ruv$Sex == "M" ] #subsetting DESeq2 object by sex
head(ds.ruv.male) #confirming male only samples
>>> class: DESeqDataSet
>>> dim: 6 24
>>> metadata(1): version
>>> assays(1): counts
>>> rownames(6): LOC108349479 LOC102556098 ... Raet1e LOC102547056
>>> rowData names(0):
>>> colnames(24): N1_Control_Male N2_Control_Male ... N5_TCE_NAC_Male
>>> N6_TCE_NAC_Male
>>> colData names(5): Sex Rep Trt dam W_1
ds.ruv.male<- DESeq(ds.ruv.male) # with RUV Does the Empirical Bayes test
ds.ruv.male<- nbinomWaldTest(ds.ruv.male, maxit=10000) # Corrects Empirical Bayes test warning
ds.male.ruv.tce<- results(ds.ruv.male, contrast =c("Trt","TCE","C"), independentFiltering = FALSE)# Extract results table
ds.male.ruv.nac<- results(ds.ruv.male, contrast =c("Trt","NAC","C"), independentFiltering = FALSE) # Extract results table
ds.male.ruv.tce.nac<- results(ds.ruv.male, contrast =c("Trt","TCE_NAC","C"), independentFiltering = FALSE)# Extract results table
Resultss (males)
#Control v. TCE (male)
mcols(ds.male.ruv.tce)
>>> DataFrame with 6 rows and 2 columns
>>> type description
>>> <character> <character>
>>> baseMean intermediate mean of normalized counts for all samples
>>> log2FoldChange results log2 fold change (MLE): Trt TCE vs C
>>> lfcSE results standard error: Trt TCE vs C
>>> stat results Wald statistic: Trt TCE vs C
>>> pvalue results Wald test p-value: Trt TCE vs C
>>> padj results BH adjusted p-values
summary(ds.male.ruv.tce,alpha=0.05)
>>>
>>> out of 16661 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up) : 24, 0.14%
>>> LFC < 0 (down) : 6, 0.036%
>>> outliers [1] : 0, 0%
>>> low counts [2] : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Reds")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.male.ruv.tce, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_Male.pdf", xlim=c(-9,9),
ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.male.ruv.tce, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
with(subset(ds.male.ruv.tce, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.male.ruv.tce, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[5]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]),
legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1 or < -1" , "FDR<0.05 and LogFC between -1 & 1" , "FDR<0.05 and LogFC >1 or < -1"))
abline(h=4.1, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. NAC (male)
mcols(ds.male.ruv.nac)
>>> DataFrame with 6 rows and 2 columns
>>> type description
>>> <character> <character>
>>> baseMean intermediate mean of normalized counts for all samples
>>> log2FoldChange results log2 fold change (MLE): Trt NAC vs C
>>> lfcSE results standard error: Trt NAC vs C
>>> stat results Wald statistic: Trt NAC vs C
>>> pvalue results Wald test p-value: Trt NAC vs C
>>> padj results BH adjusted p-values
summary(ds.male.ruv.nac,alpha=0.05)
>>>
>>> out of 16661 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up) : 0, 0%
>>> LFC < 0 (down) : 0, 0%
>>> outliers [1] : 0, 0%
>>> low counts [2] : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Reds")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.male.ruv.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_NAC_Male.pdf", xlim=c(-9,9),
ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.male.ruv.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.male.ruv.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.male.ruv.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]),
legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1 or < -1" , "FDR<0.05 and LogFC between -1 & 1" , "FDR<0.05 and LogFC >1 or < -1"))
abline(h=5.2, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. TCE+NAC (male)
palette <- brewer.pal(4, "Reds")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.male.ruv.tce.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_NAC_Male.pdf", xlim=c(-9,9),
ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.male.ruv.tce.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.male.ruv.tce.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.male.ruv.tce.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]),
legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1 or < -1" , "FDR<0.05 and LogFC between -1 & 1" , "FDR<0.05 and LogFC >1 or < -1"))
abline(h=3.6, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

Run DEseq2 Analysis with RUV (females)
ds.ruv.female<- ds.all.ruv[ , ds.all.ruv$Sex == "F" ] #subsetting DESeq2 object by sex
head(ds.ruv.female) #confirming female only samples
>>> class: DESeqDataSet
>>> dim: 6 24
>>> metadata(1): version
>>> assays(1): counts
>>> rownames(6): LOC108349479 LOC102556098 ... Raet1e LOC102547056
>>> rowData names(0):
>>> colnames(24): N1_Control_Female N2_Control_Female ... N5_TCE_NAC_Female
>>> N6_TCE_NAC_Female
>>> colData names(5): Sex Rep Trt dam W_1
ds.ruv.female<- DESeq(ds.ruv.female) # with RUV Does the Empirical Bayes test
ds.ruv.female<- nbinomWaldTest(ds.ruv.female, maxit=10000) # Corrects Empirical Bayes test warning
ds.female.ruv.tce<- results(ds.ruv.female, contrast =c("Trt","TCE","C"), independentFiltering = FALSE)# Extract results table
ds.female.ruv.nac<- results(ds.ruv.female, contrast =c("Trt","NAC","C"), independentFiltering = FALSE) # Extract results table
ds.female.ruv.tce.nac<- results(ds.ruv.female, contrast =c("Trt","TCE_NAC","C"), independentFiltering = FALSE)# Extract results table
Results (females)
#Control v. TCE (female)
mcols(ds.female.ruv.tce)
>>> DataFrame with 6 rows and 2 columns
>>> type description
>>> <character> <character>
>>> baseMean intermediate mean of normalized counts for all samples
>>> log2FoldChange results log2 fold change (MLE): Trt TCE vs C
>>> lfcSE results standard error: Trt TCE vs C
>>> stat results Wald statistic: Trt TCE vs C
>>> pvalue results Wald test p-value: Trt TCE vs C
>>> padj results BH adjusted p-values
summary(ds.female.ruv.tce,alpha=0.05)
>>>
>>> out of 16658 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up) : 388, 2.3%
>>> LFC < 0 (down) : 91, 0.55%
>>> outliers [1] : 0, 0%
>>> low counts [2] : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Greens")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.female.ruv.tce, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_female.pdf", xlim=c(-9,9),
ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.female.ruv.tce, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.female.ruv.tce, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.female.ruv.tce, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]),
legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1 or < -1" , "FDR<0.05 and LogFC between -1 & 1" , "FDR<0.05 and LogFC >1 or < -1"))
abline(h=2.8, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. NAC (female)
mcols(ds.female.ruv.nac)
>>> DataFrame with 6 rows and 2 columns
>>> type description
>>> <character> <character>
>>> baseMean intermediate mean of normalized counts for all samples
>>> log2FoldChange results log2 fold change (MLE): Trt NAC vs C
>>> lfcSE results standard error: Trt NAC vs C
>>> stat results Wald statistic: Trt NAC vs C
>>> pvalue results Wald test p-value: Trt NAC vs C
>>> padj results BH adjusted p-values
summary(ds.female.ruv.nac,alpha=0.05)
>>>
>>> out of 16658 with nonzero total read count
>>> adjusted p-value < 0.05
>>> LFC > 0 (up) : 0, 0%
>>> LFC < 0 (down) : 0, 0%
>>> outliers [1] : 0, 0%
>>> low counts [2] : 0, 0%
>>> (mean count < 0)
>>> [1] see 'cooksCutoff' argument of ?results
>>> [2] see 'independentFiltering' argument of ?results
palette <- brewer.pal(4, "Greens")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.female.ruv.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_NAC_female.pdf", xlim=c(-9,9),
ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.female.ruv.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.female.ruv.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.female.ruv.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]),
legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1 or < -1" , "FDR<0.05 and LogFC between -1 & 1" , "FDR<0.05 and LogFC >1 or < -1"))
abline(h=5.3, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)

#Control v. TCE+NAC (male)
mcols(ds.female.ruv.tce.nac)
summary(ds.female.ruv.tce.nac,alpha=0.05)
palette <- brewer.pal(4, "Greens")
par(las = 1, cex.lab = 1.25, cex.main = 1.25)
with(ds.female.ruv.tce.nac, plot(log2FoldChange, -log10(pvalue), pch=20, main="Rats_Control_vs_TCE_NAC_female.pdf", xlim=c(-9,9),
ylim=c(0,12), xlab =(Log[2]~Fold~Change), ylab=(-Log[10]~Pvalue), col = "black"))
with(subset(ds.female.ruv.tce.nac, padj<0.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[3]))
with(subset(ds.female.ruv.tce.nac, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[2]))
with(subset(ds.female.ruv.tce.nac, padj<0.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col=palette[4]))
legend("topleft", col = "black", fill = c(palette[1], palette[2], palette[3], palette[4]),
legend = c("FDR>0.05 and LogFC between -1 & 1" , "FDR>0.05 and LogFC >1 or < -1" , "FDR<0.05 and LogFC between -1 & 1" , "FDR<0.05 and LogFC >1 or < -1"))
abline(h=2.75, col="black", lty=3)
abline(v=1, col="black", lty=3)
abline(v=-1, col="black", lty=3)
Data Exploration plots
#MAplots (with RUVr)
DESeq2::plotMA(ds.male.ruv.tce)

#histogram of pvals
hist(ds.male.ruv.tce$pvalue)

#dispersion estimate plots
plotDispEsts(ds.ruv.male, ylim = c(1e-6, 10e1))
